"""
@Author : 合肥一元数智教育科技有限公司
@Date :  2025/7/10 9:30
@Description : 
获取豆瓣图书的top250数据
 https://book.douban.com/top250?start=0
"""
import random
import time

from selenium import webdriver
from selenium.webdriver.common.by import By

# 设置浏览器参数
options = webdriver.ChromeOptions()
options.add_experimental_option('detach', True)
chrome = webdriver.Chrome(options=options)

# 设置目标地址
chrome.get('https://book.douban.com/top250?start=0')
chrome.maximize_window()

books = []
def parse_html():
    # 提取数据    图书名   出版社  评分
    tbodys = chrome.find_elements(By.TAG_NAME, 'tbody')
    for tbody in tbodys:
        book = {}
        book['title'] = tbody.find_element(By.CLASS_NAME, 'pl2').text
        book['publish'] = tbody.find_element(By.CLASS_NAME, 'pl').text
        book['rating_nums'] = tbody.find_element(By.CLASS_NAME, 'rating_nums').text
        books.append(book)
        print(book)
        print('-----------------------------一本图书数据获取完毕----------------------------------')


# 循环抓取多页数据
while True:
    parse_html()
    time.sleep(random.randint(1, 3))
    # 点击下一页  获取后页连接  获取不到节点抛出异常
    try:
        chrome.find_element(By.LINK_TEXT, '后页>').click()
        time.sleep(random.randint(1, 3))
    except Exception as e:
        print('------------------数据获取结束-----------------------')
        # 退出浏览器
        chrome.quit()
        # 结束循环
        break
